Tensorlistfromtensor
将输入张量按照第一个维度拆分成多个张量。假设输入张量的形状为 [N1, N2, N3, …],该算子将输入张量拆分成 N1 个张量,每个输出张量的形状为 [N2, N3, …]。
其中 i = 0, 1, ldots, N1-1,每个输出张量包含 N1 个切片中的一个。
- 输入:
input_tensor_values - 输入张量的数据指针,大小为 input_tensor_total_elements 个元素。
input_tensor_shape - 输入张量的形状数组(int* 类型),input_tensor_shape[0] 表示第一个维度的大小(即输出张量的数量)。
input_tensor_total_elements - 输入张量的总元素数(int 类型)。
core_mask - 核掩码(int),仅共享存储版本需要。
- 输出:
output_tensors - 输出张量数组(指针数组),大小为 input_tensor_shape[0]。每个元素 output_tensors[i] 指向第 i 个输出张量的数据。每个输出张量的大小为 input_tensor_total_elements / input_tensor_shape[0] 个元素。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持fp32, int8, int16, int32, fp64, cplx64, cplx128
MT7004 支持fp16, fp32, int16, int32, cplx64
算子会复制数据,输出张量与输入张量数据独立
调用前需要确保所有 output_tensors[i] 指向的内存空间足够大(至少 input_tensor_total_elements / input_tensor_shape[0] 个元素)
输出张量的数量等于 input_tensor_shape[0]
每个输出张量的元素数为 input_tensor_total_elements / input_tensor_shape[0]
共享存储版本:
-
void i8_tensorlistfromtensor_s(int8_t *input_tensor_values, int *input_tensor_shape, int8_t **output_tensors, int input_tensor_total_elements, int core_mask)
-
void i16_tensorlistfromtensor_s(int16_t *input_tensor_values, int *input_tensor_shape, int16_t **output_tensors, int input_tensor_total_elements, int core_mask)
-
void i32_tensorlistfromtensor_s(int32_t *input_tensor_values, int *input_tensor_shape, int32_t **output_tensors, int input_tensor_total_elements, int core_mask)
-
void hp_tensorlistfromtensor_s(half *input_tensor_values, int *input_tensor_shape, half **output_tensors, int input_tensor_total_elements, int core_mask)
-
void fp_tensorlistfromtensor_s(float *input_tensor_values, int *input_tensor_shape, float **output_tensors, int input_tensor_total_elements, int core_mask)
-
void dp_tensorlistfromtensor_s(double *input_tensor_values, int *input_tensor_shape, double **output_tensors, int input_tensor_total_elements, int core_mask)
-
void c64_tensorlistfromtensor_s(float *input_tensor_values, int *input_tensor_shape, float **output_tensors, int input_tensor_total_elements, int core_mask)
-
void c128_tensorlistfromtensor_s(double *input_tensor_values, int *input_tensor_shape, double **output_tensors, int input_tensor_total_elements, int core_mask)
C调用示例:
1//FT78NE示例
2#include <stdio.h>
3#include <tensorlistfromtensor.h>
4
5int main(int argc, char* argv[]) {
6 // 假设在DDR空间
7 // 输入张量形状 [3, 4, 5],总元素数 = 3 * 4 * 5 = 60
8 // 输出3个张量,每个形状 [4, 5],元素数 = 60 / 3 = 20
9
10 int input_tensor_shape[] = {3, 4, 5};
11 int input_tensor_total_elements = 3 * 4 * 5; // 60
12
13 // 输入张量数据
14 float *input_tensor_values = (float *)0xA0000000;
15 // input_tensor_values 包含 60 个 float 元素
16
17 // 输出张量数组(需要预先分配内存)
18 float *output0 = (float *)0xB0000000; // 第0个输出张量,20个元素
19 float *output1 = (float *)0xB0100000; // 第1个输出张量,20个元素
20 float *output2 = (float *)0xB0200000; // 第2个输出张量,20个元素
21
22 float* output_tensors[3] = {output0, output1, output2};
23
24 int core_mask = 0xff;
25
26 fp_tensorlistfromtensor_s(input_tensor_values, input_tensor_shape,
27 output_tensors, input_tensor_total_elements, core_mask);
28
29 return 0;
30}
私有存储版本:
-
void i8_tensorlistfromtensor_p(int8_t *input_tensor_values, int *input_tensor_shape, int8_t **output_tensors, int input_tensor_total_elements)
-
void i16_tensorlistfromtensor_p(int16_t *input_tensor_values, int *input_tensor_shape, int16_t **output_tensors, int input_tensor_total_elements)
-
void i32_tensorlistfromtensor_p(int32_t *input_tensor_values, int *input_tensor_shape, int32_t **output_tensors, int input_tensor_total_elements)
-
void hp_tensorlistfromtensor_p(half *input_tensor_values, int *input_tensor_shape, half **output_tensors, int input_tensor_total_elements)
-
void fp_tensorlistfromtensor_p(float *input_tensor_values, int *input_tensor_shape, float **output_tensors, int input_tensor_total_elements)
-
void dp_tensorlistfromtensor_p(double *input_tensor_values, int *input_tensor_shape, double **output_tensors, int input_tensor_total_elements)
-
void c64_tensorlistfromtensor_p(float *input_tensor_values, int *input_tensor_shape, float **output_tensors, int input_tensor_total_elements)
-
void c128_tensorlistfromtensor_p(double *input_tensor_values, int *input_tensor_shape, double **output_tensors, int input_tensor_total_elements)
C调用示例:
1//FT78NE示例
2#include <stdio.h>
3#include <tensorlistfromtensor.h>
4
5int main(int argc, char* argv[]) {
6 // 假设在L2空间
7 int input_tensor_shape[] = {3, 4, 5};
8 int input_tensor_total_elements = 3 * 4 * 5;
9
10 float *input_tensor_values = (float *)0x10000000;
11
12 float *output0 = (float *)0x10010000;
13 float *output1 = (float *)0x10011000;
14 float *output2 = (float *)0x10012000;
15
16 float* output_tensors[3] = {output0, output1, output2};
17
18 fp_tensorlistfromtensor_p(input_tensor_values, input_tensor_shape,
19 output_tensors, input_tensor_total_elements);
20
21 return 0;
22}